library(data.table)
library(ggplot2)
library(caret)
library(nnet)
library(pls)
library(FactoMineR)
library(qqman)

import des datas

data(Phenotype)
data(Genomic)
data("localisation")
pheno <- as.data.table(Phenotype)
loc_df<- as.data.table(localisation)
geno<- as.data.table(Genomic)
geno[, ID := rownames(Genomic)]
pheno[, ID := rownames(Phenotype)]

merged<- merge(pheno, loc_df[,list(Population,ID)], by = "ID")
merged<- merge(merged, geno, by = "ID")

LM (Pheno ~ population )


df <- as.data.table(merged)

phenos <- names(df)[sapply(df, is.numeric)]
phenos <- setdiff(phenos, c("class_index","Lat","Lgn","Elev","ID"))  

get_r2 <- function(var) {
  form<- as.formula(paste(var, "~ Population + Elev + Lat + Lgn "))
  mod<- lm(form, data = df)
  summary(mod)$r.squared
}

r2_values<- data.table(
  phenotype = phenos,
  r2= sapply(phenos, get_r2)

)
Error in eval(predvars, data, env) : object 'Elev' not found

ggplot(r2_values, aes(x = reorder(phenotype, r2), y = r2)) +
  geom_point(size = 3) +
  geom_segment(aes(x = phenotype, xend = phenotype, y = 0, yend = r2)) +
  coord_flip() +
  ylab("R² lm(Phénotype ~ Population)") +
  xlab("Caractère phénotypique") +
  theme_minimal()

Data visualisation

Groupes de variables : chromosomes, phénotypes, populations

constructions des axes : chromosomes => on sépare les individus selon leur distances génomiques et on projette en supplémentaires les phénotypes / population

on pourra voir quels chromosomes ne sont pas dutout corrélés avec des traits phénotypiques

Réccupération des positions :

geno_mat <- as.matrix(geno)
coords <- do.call(rbind, strsplit(colnames(geno), "_"))
Warning: number of columns of result is not a multiple of vector length (arg 1)
chrom <- as.factor(coords[,1])
pos <- as.numeric(coords[,2])
Warning: NAs introduced by coercion

PCA

pca <- PCA(merged[,2:217022], quali.sup=21 ,quanti.sup=(2:20) )

plot(pca,choix="ind",habillage=21)

plot(pca,choix="var", #invisible = c("quali","var"),
     label = c("quanti.sup"),
     select = "coord 10" )

MixOmic

sPCA

plotIndiv(result.spca.multi)  

plotVar(result.spca.multi)   
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Please use tidy evaluation idioms with `aes()`. 
See also `vignette("ggplot2-in-packages")` for more information.

# extract the variables used to construct the first PC
selectVar(result.spca.multi, comp = 1)$name 
 [1] "Chr02_6422161"  "Chr02_6427114"  "Chr02_6427264"  "Chr02_6416381"  "Chr13_15625954"
 [6] "Chr13_15574007" "Chr13_15627568" "Chr07_7181966"  "Chr13_15626544" "Chr02_6382504" 
[11] "Chr13_15635566" "Chr02_6422861"  "Chr02_6423252"  "Chr02_6426926"  "Chr12_3267410" 
[16] "Chr12_3264433"  "Chr12_3267439"  "Chr12_3264479"  "Chr10_20127228" "Chr01_42022246"
[21] "Chr17_7247760"  "Chr10_20121818" "Chr10_20126974" "Chr17_5836683"  "Chr02_6325961" 
[26] "Chr03_18516527" "Chr06_22417885" "Chr02_6332720"  "Chr11_17347389" "Chr06_24641715"
[31] "Chr11_17347361" "Chr14_13405721" "Chr11_17347960" "Chr01_14722117" "Chr06_24641246"
[36] "Chr03_11418460" "Chr03_11422148" "Chr01_14722377" "Chr03_15711505" "Chr11_4789295" 
[41] "Chr01_14717384" "Chr15_9806504"  "Chr17_14258124" "Chr17_14265429" "Chr07_12991228"
[46] "Chr07_12991382" "Chr01_8617466"  "Chr06_20423555" "Chr07_12997322" "Chr01_14708394"
# depict weight assigned to each of these variables
plotLoadings(result.spca.multi, method = 'mean', contrib = 'max')

block PLS

pls.result <- mixOmics::pls(X, Y) 
Warning: the standard deviation is zero
plotIndiv(pls.result) 

plotIndiv(pls.result, group = merged$Population, 
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)

block sPLS

head(merged[,2:21])
plotIndiv(spls.result, group = merged$Population, 
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)

# Identifier les coefficients non nuls
nonzero_idx <- loadX != 0
df_nonzero_X <- data.frame(
  SNP = names(loadX)[nonzero_idx],
  loading  = loadX[nonzero_idx]
)
coords <- do.call(rbind, strsplit(rownames(df_nonzero_X), "_"))
coords[,1] <- as.numeric(sub("Chr", "", coords[,1]))
pos <- as.numeric(coords[,2])
merged_nonzero <- cbind(coords,df_nonzero_X)
merged_nonzero$"2" <- as.numeric(merged_nonzero$"2")
merged_nonzero$"1" <- as.numeric(merged_nonzero$"1")
merged_nonzero$loading <- abs(merged_nonzero$loading)
manhattan(merged_nonzero, chr = "1", bp = "2", p = "loading",snp = "SNP",
          col = c("blue4", "orange3"),  # couleurs alternées par chromosome
          cex = 0.6)  # taille des points

block PLS_DA

# use the mirna, mrna and protein expression levels as predictive datasets
# note that each dataset is measured across the same individuals (samples)

X1<- as.matrix(merged[,23:217022])
X1 <- X1[, apply(X1, 2, sd) != 0]
X2 <- as.matrix(merged[,2:21])

Y <- as.factor(merged$Population)

X <- list(geno = X1, pheno = X2)
result.diablo.tcga <- block.plsda(X, Y) # run the method
plotIndiv(result.diablo.tcga) # plot the samples

plotVar(result.diablo.tcga) # plot the variables
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Please use tidy evaluation idioms with `aes()`. 
See also `vignette("ggplot2-in-packages")` for more information.

plotIndiv(result.diablo.tcga,
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)

Block sPLS DA

# set the number of features to use for the X datasets
list.keepX = list(geno = c(rep(10000,5)), pheno = c(rep(20,5)))
list.keepX
$geno
[1] 10000 10000 10000 10000 10000

$pheno
[1] 20 20 20 20 20
# run the method
result.sparse.diablo.tcga <-  block.splsda(X, Y, keepX = list.keepX, ncomp = 5)
# plot the contributions of each feature to each dimension
plotLoadings(result.sparse.diablo.tcga, ncomp = 5)

plotIndiv(result.sparse.diablo.tcga,ellipse = TRUE) # plot the samples

plotVar(result.sparse.diablo.tcga) # plot the variables

Boostrapped sPLS

library(spls)
y <- merged$CIRC2009  
y <- as.numeric(y)
X <- as.matrix(merged[,23:217022])
X <- X[, apply(X, 2, sd) != 0]
# Split stratifié 
# Ici, on fait un split simple car phénotype continu
train_index <- createDataPartition(y, p = 0.9, list = FALSE)

X_train <- X[train_index, ]
var_threshold <- 1e-8

nonzero_cols <- apply(X_train, 2, var) > var_threshold

length(nonzero_cols)
[1] 206066
X_train <- X_train[, nonzero_cols]

X_test  <- X[-train_index, ]
X_test  <- X_test[, nonzero_cols]






y_train <- y[train_index]

y_test  <- y[-train_index]
eta = seq(0.1,0.9,0.1)
K = c(1:5)
cv.spls( X_train, y_train, fold=5, K, eta, kappa=0.5, select="pls2", fit="simpls",scale.x=FALSE, scale.y=FALSE, plot.it=TRUE )
eta = 0.1 
eta = 0.2 
eta = 0.3 
eta = 0.4 
eta = 0.5 
eta = 0.6 
eta = 0.7 
eta = 0.8 
eta = 0.9 

Optimal parameters: eta = 0.2, K = 4

model <- spls(X_train, y_train, K = 4, eta =0.2 , kappa=0.5, select="pls2", fit="simpls",
scale.x=FALSE, scale.y=FALSE, eps=1e-4, maxstep=100, trace=FALSE)
ped <- predict.spls( model, X_test, type = c("fit","coefficient"))
ped
          [,1]
 [1,] 5.846174
 [2,] 6.110574
 [3,] 5.941446
 [4,] 5.787127
 [5,] 5.939230
 [6,] 5.916100
 [7,] 6.413289
 [8,] 6.578143
 [9,] 6.180510
[10,] 5.820094
[11,] 6.049748
[12,] 7.130326
[13,] 7.241039
[14,] 7.406019
[15,] 7.255352
[16,] 7.195316
[17,] 6.252328
[18,] 5.839857
[19,] 6.072287
f <- spls::ci.spls( model, coverage=0.95, B=1000,
                    plot.it=FALSE, plot.fix="y",
                    plot.var=NA, K=model$K, fit=model$fit )
10 % completed...
20 % completed...
30 % completed...
40 % completed...
50 % completed...
60 % completed...
70 % completed...
80 % completed...
90 % completed...
100 % completed...
names(f)
[1] "cibeta"  "betahat" "lbmat"   "ubmat"  
nonzero_idx <- which(f$betahat != 0)
betahat_nonzero <- f$betahat[nonzero_idx]
length(nonzero_idx)
[1] 49122

MFA

res = MFA(geno_mat, group=c(25974,16646,12284,11790,14769,17004,8327,14040,9840,15964,7483,7403,8040,11123,8486,7629,6747,8267,4876,308), type=c(rep("s",20)), ncp=5, name.group=c("Chr01","Chr02","Chr03","Chr04", "Chr05","Chr06","Chr07","Chr08","Chr09","Chr10", "Chr11","Chr12","Chr13", "Chr14","Chr15","Chr16","Chr17","Chr18","Chr19", "scaffold"))
dim(geno_mat)
[1]    199 217000

PLS : Phenotype ~ all SNP


# Préparation des donnée

y <- pheno$CIRC2009  
y <- as.numeric(y)
X <- as.matrix(geno)  

# Split stratifié 
# Ici, on fait un split simple car phénotype continu
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]
#  PLS 

pls_model <- plsr(y_train ~ X_train, ncomp = 20, validation = "CV")
y_pred <- predict(pls_model, newdata = X_test, ncomp = 2)

# Evaluation 
R2_test <- cor(y_test, y_pred)^2
cat("R² sur l'ensemble de test :", round(R2_test, 3), "\n")
R² sur l'ensemble de test : 0.465 
RMSE_test <- sqrt(mean((y_test - y_pred)^2))
cat("RMSEP sur l'ensemble de test :", round(RMSE_test, 3), "\n")
RMSEP sur l'ensemble de test : 0.543 

y_pred_vec <- as.vector(y_pred)

plot_df <- data.frame(
  Observed = y_test,
  Predicted = y_pred_vec,
  Population = merged$Population[-train_index]
)

ggplot(plot_df, aes(x = Observed, y = Predicted, color = Population)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_abline(slope = 1, intercept = 0, color = "black", linetype = "dashed") +
  theme_minimal() +
  labs(title = "Performance PLS : Observé vs Prédit",
       subtitle = paste("R² =", round(cor(plot_df$Observed, plot_df$Predicted)^2, 3)),
       x = "Phénotype Observé",
       y = "Phénotype Prédit",
       color = "Population") +
  theme(plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 12),
        legend.position = "right")

weights_comp1 <- pls_model$loading.weights[, 1]

weights_abs <- abs(weights_comp1)

top20_idx <- order(weights_abs, decreasing = TRUE)[1:20]
top20_snps <- rownames(pls_model$loading.weights)[top20_idx]
top20_values <- weights_comp1[top20_idx]

data.frame(SNP = top20_snps, Weight = top20_values)
NA
hist(weights_comp1, breaks = seq(min(weights_comp1), max(weights_comp1), length.out = 50),
     main = "Distribution des poids de la 1ère composante",
     xlab = "loadings", col = "lightblue", border = "white")
Error: object 'weights_comp1' not found

PLS Subset SNP

set.seed(123)

# Paramètres
n_iter <- 10
subset_sizes <- c(1000, 5000, 10000, 50000)
ncomp_candidates <- 1:5 

results <- data.frame(
  subset_size = integer(),
  iter = integer(),
  outer_fold = integer(),
  ncomp_opt = integer(),
  R2 = numeric(),
  RMSE = numeric()
)

# Folds externes
outer_folds <- createFolds(y, k = 5, returnTrain = TRUE)

for(n_snps in subset_sizes){
  cat("Taille du subset :", n_snps, "\n")
  
  for(i in 1:n_iter){
    
    # Tirage aléatoire de SNP
    snp_cols <- sample(colnames(X), n_snps)
    X_sub <- X[, snp_cols]
    
    for(f in 1:5){
      
      # Split externe
      train_index <- outer_folds[[f]]
      test_index  <- setdiff(seq_len(nrow(X_sub)), train_index)
      
      X_train <- X_sub[train_index, ]
      X_test  <- X_sub[test_index, ]
      y_train <- y[train_index]
      y_test  <- y[test_index]
      
      # -----------------------------
      # Inner 5-fold CV pour choisir ncomp
      # -----------------------------
      pls_model <- plsr(y_train ~ X_train, ncomp = max(ncomp_candidates), validation = "CV", segments = 5)
      bestncomp = selectNcomp(pls_model,method="onesigma") +1 
      y_pred <- predict(pls_model,  newdata = X_test, ncomp = bestncomp)
      
      # -----------------------------
      # Modèle final sur l'entraînement externe
      # -----------------------------
      final_model <- plsr(y_train ~ X_train, ncomp = bestncomp, validation = "none")
      y_pred <- predict(final_model, newdata = X_test, ncomp = bestncomp)
      
      # Performance externe
      R2_test <- cor(y_test, y_pred)^2
      RMSE_test <- sqrt(mean((y_test - y_pred)^2))
      
      results <- rbind(results, data.frame(
        subset_size = n_snps,
        iter = i,
        outer_fold = f,
        ncomp_opt = bestncomp,
        R2 = R2_test,
        RMSE = RMSE_test
      ))
    }
  }
}
Taille du subset : 1000 
Taille du subset : 5000 
Taille du subset : 10000 
Taille du subset : 50000 
# Boxplots
ggplot(results, aes(x = factor(subset_size), y = R2, fill = factor(subset_size))) +
  geom_boxplot() + theme_minimal() +
  labs(title = "R² selon la taille du sous-ensemble de SNP", x = "Nombre de SNP", y = "R² sur l'échantillon test") +
  scale_fill_brewer(palette = "Set2") + theme(legend.position = "none")


ggplot(results, aes(x = factor(subset_size), y = RMSE, fill = factor(subset_size))) +
  geom_boxplot() + theme_minimal() +
  labs(title = "RMSEP selon la taille du sous-ensemble de SNP", x = "Nombre de SNP", y = "RMSEP") +
  scale_fill_brewer(palette = "Set2") + theme(legend.position = "none")

results$subset_size <- as.factor(results$subset_size)
summary(results)
 subset_size      iter        outer_fold   ncomp_opt          R2              RMSE       
 1000 :50    Min.   : 1.0   Min.   :1    Min.   :2.00   Min.   :0.2440   Min.   :0.4597  
 5000 :50    1st Qu.: 3.0   1st Qu.:2    1st Qu.:2.00   1st Qu.:0.4179   1st Qu.:0.5422  
 10000:50    Median : 5.5   Median :3    Median :2.00   Median :0.4671   Median :0.5686  
 50000:50    Mean   : 5.5   Mean   :3    Mean   :2.33   Mean   :0.4671   Mean   :0.5597  
             3rd Qu.: 8.0   3rd Qu.:4    3rd Qu.:3.00   3rd Qu.:0.5224   3rd Qu.:0.5849  
             Max.   :10.0   Max.   :5    Max.   :3.00   Max.   :0.5994   Max.   :0.6735  
setDT(results)
compact_summary <- results[
  , {
      stats <- function(x) {
        c(
          Min      = min(x, na.rm = TRUE),
          Q1       = quantile(x, 0.25, na.rm = TRUE),
          Median   = median(x, na.rm = TRUE),
          Mean     = mean(x, na.rm = TRUE),
          SD       = sd(x, na.rm = TRUE),
          Q3       = quantile(x, 0.75, na.rm = TRUE),
          Max      = max(x, na.rm = TRUE)
        )
      }
      
      data.table(
        Statistic = names(stats(iter)),
        iter      = stats(iter),
        ncomp_opt = stats(ncomp_opt),
        R2        = stats(R2),
        RMSE      = stats(RMSE)
      )
    },
  by = subset_size
]

compact_summary

PLS Subset SNP | calcul de VIP | nested CV

\[VIP_{j} = \sqrt{ p \frac{\displaystyle \sum_{h=1}^{A} SS_{Y,h}\frac{w_{jh}^{2}}{\lVert w_{h} \rVert^{2}}}{\displaystyle \sum_{h=1}^{A} SS_{Y,h}} }\]

library(pls)
library(data.table)
library(caret)
library(foreach)
library(doParallel)
library(progressr)
set.seed(123)
# Paramètres
n_iter <- 400
subset_size <- 10000     # nombre de SNP aléatoires par itération
ncomp_candidates <- 1:5  # candidats pour ncomp
nfold_outer <- 5
nfold_inner <- 10
# Préparer cluster parallèle
n_cores <- parallel::detectCores() - 2
cl <- makeCluster(n_cores)
registerDoParallel(cl)

# --- Pipeline principal ---

out <- foreach(i = 1:n_iter, .packages = c("pls","caret","data.table")) %dopar% {

  # 1) Tirage aléatoire SNP
  snp_cols <- sample(colnames(X), subset_size)
  X_sub <- X[, snp_cols]

  # 2) Split en folds externes
  folds <- createFolds(y, k = nfold_outer, list = TRUE)

  # Tables locales
  perf_list <- list()
  vip_list  <- list()

  for(f in seq_len(nfold_outer)) {

    test_idx  <- folds[[f]]
    train_idx <- setdiff(seq_len(nrow(X_sub)), test_idx)

    X_train <- X_sub[train_idx, ]
    y_train <- y[train_idx]
    X_test  <- X_sub[test_idx, ]
    y_test  <- y[test_idx]

    # 3) Sélection ncomp via CV interne
    pls_cv <- plsr(y_train ~ X_train,
                   ncomp = max(ncomp_candidates),
                   validation = "CV",
                   segments = nfold_inner)

    best_ncomp <- selectNcomp(pls_cv, method = "onesigma") + 1

    # Ajustement final
    pls_model <- plsr(y_train ~ X_train, ncomp = best_ncomp)

    # 4) Performance test
    y_pred <- predict(pls_model, newdata = X_test, ncomp = best_ncomp)
    R2_test  <- cor(y_test, y_pred)^2
    RMSE_test <- sqrt(mean((y_test - y_pred)^2))

    # 5) VIP
    vip <- calc_vip(pls_model)

    # 6) Stockage séparé

    # performances fold-level
    perf_list[[f]] <- data.table(
      iter = i,
      fold = f,
      ncomp = best_ncomp,
      R2_test = R2_test,
      RMSE_test = RMSE_test
    )

    # VIP SNP-level
    vip_list[[f]] <- data.table(
      SNP = names(vip),
      VIP = as.numeric(vip),
      iter = i,
      fold = f
    )
  }

  list(
    perf = rbindlist(perf_list),
    vip  = rbindlist(vip_list)
  )
}

stopCluster(cl)

# Reconstruction finale dans l'ordre
results_perf <- rbindlist(lapply(out, `[[`, "perf"))
results_vip  <- rbindlist(lapply(out, `[[`, "vip"))
results_perf
vip_perf<- results_perf[, .(R2_test_mean = mean(R2_test)), by = iter]
vip_perf_fold<- results_perf[, .(R2_test_mean = mean(R2_test)), by = fold]
ggplot(results_perf[1:1000,], aes(x = factor(iter), y = R2_test)) +
  geom_violin(fill = "skyblue", color = "black") +
  labs(x = "Itération", y = expression(R^2), title = "R² 10-outer-fold ") +
  theme_minimal()

mean(vip_perf_fold$R2_test_mean)
[1] 0.4547343
vip_perf_fold
library(data.table)
library(ggplot2)


# Calcul du VIP moyen par SNP
vip_mean<- results_vip[, .(VIP_mean = mean(VIP),SNP_count = .N), by = SNP]
# Sélection des 30 SNP ayant le VIP moyen le plus élevé
topN<- vip_mean[order(-VIP_mean)][0:5000, SNP]

df_topNPLS<- results_vip[SNP %in% topN]
df_topNPLS <- merge(df_topNPLS, vip_mean[, .(SNP, SNP_count)], by = "SNP")
df_topNPLS$SNP_count <- df_topNPLS$SNP_count/10
ggplot(df_topNPLS, aes(x = reorder(SNP,VIP), y = VIP, fill = SNP_count)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.1, outlier.size = 0.5) +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
  ) +
  labs(
    x = "SNP",
    y = "VIP",
    title = "Distribution du VIP pour les n = 50 SNP les plus importants",
    fill = "Nbr de selection du SNP"
  )

hist(vip_mean$VIP_mean, breaks = seq(min(vip_mean$VIP_mean), max(vip_mean$VIP_mean), length.out = 50),
     main = "Distribution des scores VIP moyens",
     xlab = "scores VIP moyens", col = "lightblue", border = "white")

Lasso

library(glmnet)

# Assurer que y est numérique et X est une matrice
y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)

train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]

# Ajustement du Lasso avec validation croisée pour choisir lambda
set.seed(123)

cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1, nfolds = 5)

# Afficher la meilleure valeur de lambda
best_lambda <- cv_lasso$lambda.min
print(best_lambda)
[1] 0.01043408
# Tracer l'erreur de validation croisée
plot(cv_lasso)


# Ajuster le modèle final avec lambda optimal
lasso_model <- glmnet(X_train, y_train, alpha = 1, lambda = best_lambda)

# Extraire les coefficients
coef(lasso_model)
217001 x 1 sparse Matrix of class "dgCMatrix"
                   s0
(Intercept)  6.531206
Chr01_13832  .       
Chr01_14786  .       
Chr01_14892  .       
Chr01_16212  .       
Chr01_16229  .       
Chr01_16423  .       
Chr01_60631  .       
Chr01_60791  .       
Chr01_61014  .       
Chr01_64402  .       
Chr01_64685  .       
Chr01_64771  .       
Chr01_64878  .       
Chr01_64939  .       
Chr01_66095  .       
Chr01_66700  .       
Chr01_66763  .       
Chr01_66768  .       
Chr01_67017  .       
Chr01_67266  .       
Chr01_67274  .       
Chr01_67423  .       
Chr01_67708  .       
Chr01_68386  .       
Chr01_69150  .       
Chr01_88238  .       
Chr01_88315  .       
Chr01_88376  .       
Chr01_88448  .       
Chr01_88595  .       
Chr01_89812  .       
Chr01_90924  .       
Chr01_91126  .       
Chr01_91137  .       
Chr01_91166  .       
Chr01_91710  .       
Chr01_91776  .       
Chr01_92124  .       
Chr01_106725 .       
Chr01_106745 .       
Chr01_106877 .       
Chr01_107077 .       
Chr01_108678 .       
Chr01_108877 .       
Chr01_109763 .       
Chr01_109880 .       
Chr01_110848 .       
Chr01_111824 .       
Chr01_111900 .       
Chr01_113593 .       
Chr01_113967 .       
Chr01_114929 .       
Chr01_115102 .       
Chr01_115122 .       
Chr01_115855 .       
Chr01_115918 .       
Chr01_116033 .       
Chr01_116138 .       
Chr01_116149 .       
Chr01_116496 .       
Chr01_116529 .       
Chr01_116673 .       
Chr01_116778 .       
Chr01_116825 .       
Chr01_116838 .       
Chr01_117019 .       
Chr01_117028 .       
Chr01_118375 .       
Chr01_118879 .       
Chr01_119119 .       
Chr01_119357 .       
Chr01_119360 .       
Chr01_119712 .       
Chr01_119811 .       
Chr01_119899 .       
Chr01_120015 .       
Chr01_122193 .       
Chr01_123846 .       
Chr01_124409 .       
Chr01_124448 .       
Chr01_126036 .       
Chr01_126037 .       
Chr01_126254 .       
Chr01_126830 .       
Chr01_127858 .       
Chr01_128069 .       
Chr01_128072 .       
Chr01_128430 .       
Chr01_128477 .       
Chr01_131048 .       
Chr01_131078 .       
Chr01_131083 .       
Chr01_131341 .       
Chr01_133610 .       
Chr01_133634 .       
Chr01_133656 .       
Chr01_133689 .       
Chr01_133960 .       
Chr01_134212 .       
Chr01_134973 .       
Chr01_135161 .       
Chr01_135803 .       
Chr01_136014 .       
Chr01_136186 .       
Chr01_136433 .       
Chr01_139710 .       
Chr01_150514 .       
Chr01_150580 .       
Chr01_161422 .       
Chr01_162079 .       
Chr01_162372 .       
Chr01_162390 .       
Chr01_163162 .       
Chr01_163164 .       
Chr01_164513 .       
Chr01_165240 .       
Chr01_165266 .       
Chr01_165518 .       
Chr01_167741 .       
Chr01_167802 .       
Chr01_168463 .       
Chr01_168500 .       
Chr01_168539 .       
Chr01_168555 .       
Chr01_168732 .       
Chr01_168880 .       
Chr01_168957 .       
Chr01_169026 .       
Chr01_169418 .       
Chr01_169527 .       
Chr01_169536 .       
Chr01_169673 .       
Chr01_169812 .       
Chr01_169931 .       
Chr01_169961 .       
Chr01_170073 .       
Chr01_170335 .       
Chr01_172462 .       
Chr01_172544 .       
Chr01_172599 .       
Chr01_172601 .       
Chr01_173004 .       
Chr01_173300 .       
Chr01_173323 .       
Chr01_173344 .       
Chr01_173517 .       
Chr01_199064 .       
Chr01_199073 .       
Chr01_199186 .       
Chr01_199221 .       
Chr01_200793 .       
Chr01_200830 .       
Chr01_200869 .       
Chr01_205964 .       
Chr01_206007 .       
Chr01_206052 .       
Chr01_206157 .       
Chr01_208376 .       
Chr01_208379 .       
Chr01_208419 .       
Chr01_210339 .       
Chr01_210368 .       
Chr01_211650 .       
Chr01_211674 .       
Chr01_211752 .       
Chr01_216506 .       
Chr01_216601 .       
Chr01_216642 .       
Chr01_216682 .       
Chr01_216759 .       
Chr01_216812 .       
Chr01_216982 .       
Chr01_216997 .       
Chr01_225239 .       
Chr01_225272 .       
Chr01_225350 .       
Chr01_225519 .       
Chr01_226505 .       
Chr01_226558 .       
Chr01_226733 .       
Chr01_227542 .       
Chr01_228264 .       
Chr01_228643 .       
Chr01_235333 .       
Chr01_237242 .       
Chr01_238136 .       
Chr01_238138 .       
Chr01_238244 .       
Chr01_238261 .       
Chr01_238318 .       
Chr01_238385 .       
Chr01_238620 .       
Chr01_238676 .       
Chr01_238947 .       
Chr01_238986 .       
Chr01_239362 .       
Chr01_240009 .       
Chr01_240404 .       
Chr01_240464 .       
Chr01_240674 .       
Chr01_240878 .       
Chr01_240926 .       
Chr01_240929 .       
Chr01_241113 .       
Chr01_241164 .       
Chr01_241173 .       
Chr01_241615 .       
Chr01_242818 .       
Chr01_242922 .       
Chr01_243597 .       
Chr01_244158 .       
Chr01_244205 .       
Chr01_244323 .       
Chr01_244703 .       
Chr01_244738 .       
Chr01_244876 .       
Chr01_244896 .       
Chr01_245368 .       
Chr01_245496 .       
Chr01_245546 .       
Chr01_245763 .       
Chr01_246874 .       
Chr01_246913 .       
Chr01_248096 .       
Chr01_248167 .       
Chr01_248173 .       
Chr01_248283 .       
Chr01_248882 .       
Chr01_250235 .       
Chr01_250243 .       
Chr01_250308 .       
Chr01_251674 .       
Chr01_251859 .       
Chr01_260199 .       
Chr01_260293 .       
Chr01_260338 .       
Chr01_260558 .       
Chr01_260760 .       
Chr01_260785 .       
Chr01_263292 .       
Chr01_263493 .       
Chr01_263517 .       
Chr01_263775 .       
Chr01_263790 .       
Chr01_263877 .       
Chr01_263967 .       
Chr01_263976 .       
Chr01_264811 .       
Chr01_265716 .       
Chr01_265878 .       
Chr01_265895 .       
Chr01_265946 .       
Chr01_266320 .       
Chr01_267203 .       
Chr01_268640 .       
Chr01_269409 .       
Chr01_269683 .       
Chr01_279781 .       
Chr01_280963 .       
Chr01_284001 .       
Chr01_284068 .       
Chr01_295470 .       
Chr01_295580 .       
Chr01_295700 .       
Chr01_295779 .       
Chr01_295781 .       
Chr01_296958 .       
Chr01_296972 .       
Chr01_297053 .       
Chr01_297140 .       
Chr01_297206 .       
Chr01_297435 .       
Chr01_297946 .       
Chr01_304505 .       
Chr01_304529 .       
Chr01_305327 .       
Chr01_305804 .       
Chr01_305993 .       
Chr01_306028 .       
Chr01_306036 .       
Chr01_306063 .       
Chr01_306198 .       
Chr01_311559 .       
Chr01_311801 .       
Chr01_311878 .       
Chr01_311937 .       
Chr01_312041 .       
Chr01_312082 .       
Chr01_313076 .       
Chr01_313112 .       
Chr01_313469 .       
Chr01_313562 .       
Chr01_313569 .       
Chr01_313712 .       
Chr01_314006 .       
Chr01_314057 .       
Chr01_314065 .       
Chr01_315360 .       
Chr01_315901 .       
Chr01_316697 .       
Chr01_317341 .       
Chr01_317695 .       
Chr01_317729 .       
Chr01_319056 .       
Chr01_319083 .       
Chr01_319868 .       
Chr01_319888 .       
Chr01_319915 .       
Chr01_320492 .       
Chr01_320502 .       
Chr01_320576 .       
Chr01_320789 .       
Chr01_320797 .       
Chr01_338679 .       
Chr01_338687 .       
Chr01_338761 .       
Chr01_339402 .       
Chr01_340601 .       
Chr01_340608 .       
Chr01_340653 .       
Chr01_369736 .       
Chr01_369774 .       
Chr01_370205 .       
Chr01_370217 .       
Chr01_370278 .       
Chr01_370607 .       
Chr01_370664 .       
Chr01_370778 .       
Chr01_371488 .       
Chr01_371644 .       
Chr01_371838 .       
Chr01_400068 .       
Chr01_400143 .       
Chr01_400299 .       
Chr01_400340 .       
Chr01_400526 .       
Chr01_401736 .       
Chr01_402285 .       
Chr01_402306 .       
Chr01_402361 .       
Chr01_402378 .       
Chr01_402891 .       
Chr01_402910 .       
Chr01_402963 .       
Chr01_402975 .       
Chr01_403053 .       
Chr01_406351 .       
Chr01_406432 .       
Chr01_406987 .       
Chr01_407317 .       
Chr01_408455 .       
Chr01_431702 .       
Chr01_432558 .       
Chr01_432562 .       
Chr01_432602 .       
Chr01_432797 .       
Chr01_433004 .       
Chr01_433056 .       
Chr01_433109 .       
Chr01_434874 .       
Chr01_444758 .       
Chr01_446327 .       
Chr01_446560 .       
Chr01_446565 .       
Chr01_446856 .       
Chr01_446894 .       
Chr01_446916 .       
Chr01_448313 .       
Chr01_451121 .       
Chr01_451462 .       
Chr01_451737 .       
Chr01_451976 .       
Chr01_452075 .       
Chr01_452106 .       
Chr01_452963 .       
Chr01_453086 .       
Chr01_453180 .       
Chr01_453186 .       
Chr01_470430 .       
Chr01_470456 .       
Chr01_470478 .       
Chr01_471384 .       
Chr01_471736 .       
Chr01_471754 .       
Chr01_472207 .       
Chr01_472809 .       
Chr01_473343 .       
Chr01_473391 .       
Chr01_473691 .       
Chr01_474251 .       
Chr01_474308 .       
Chr01_474316 .       
Chr01_474759 .       
Chr01_474777 .       
Chr01_475150 .       
Chr01_475418 .       
Chr01_475445 .       
Chr01_475465 .       
Chr01_508685 .       
Chr01_508828 .       
Chr01_508875 .       
Chr01_509166 .       
Chr01_509417 .       
Chr01_514737 .       
Chr01_514962 .       
Chr01_514981 .       
Chr01_515044 .       
Chr01_515196 .       
Chr01_515306 .       
Chr01_536676 .       
Chr01_536704 .       
Chr01_536746 .       
Chr01_536827 .       
Chr01_537154 .       
Chr01_537704 .       
Chr01_537731 .       
Chr01_538073 .       
Chr01_538209 .       
Chr01_538541 .       
Chr01_538817 .       
Chr01_539555 .       
Chr01_539765 .       
Chr01_540424 .       
Chr01_541402 .       
Chr01_541442 .       
Chr01_543284 .       
Chr01_543303 .       
Chr01_543345 .       
Chr01_543348 .       
Chr01_543945 .       
Chr01_543970 .       
Chr01_544090 .       
Chr01_544686 .       
Chr01_545628 .       
Chr01_545704 .       
Chr01_546448 .       
Chr01_546819 .       
Chr01_546836 .       
Chr01_548065 .       
Chr01_548074 .       
Chr01_548116 .       
Chr01_548128 .       
Chr01_548636 .       
Chr01_548666 .       
Chr01_548679 .       
Chr01_548853 .       
Chr01_548856 .       
Chr01_548897 .       
Chr01_553635 .       
Chr01_556076 .       
Chr01_556248 .       
Chr01_556289 .       
Chr01_556295 .       
Chr01_556505 .       
Chr01_556601 .       
Chr01_556625 .       
Chr01_556655 .       
Chr01_556725 .       
Chr01_556811 .       
Chr01_557461 .       
Chr01_557631 .       
Chr01_557732 .       
Chr01_557840 .       
Chr01_557848 .       
Chr01_566889 .       
Chr01_570769 .       
Chr01_570774 .       
Chr01_570874 .       
Chr01_572406 .       
Chr01_572439 .       
Chr01_572479 .       
Chr01_572518 .       
Chr01_573503 .       
Chr01_579281 .       
Chr01_579332 .       
Chr01_579609 .       
Chr01_579610 .       
Chr01_579889 .       
Chr01_587929 .       
Chr01_588113 .       
Chr01_588167 .       
Chr01_588178 .       
Chr01_588228 .       
Chr01_590503 .       
Chr01_590525 .       
Chr01_590538 .       
Chr01_590772 .       
Chr01_590777 .       
Chr01_590808 .       
Chr01_590919 .       
Chr01_590982 .       
Chr01_591222 .       
Chr01_591245 .       
Chr01_591360 .       
Chr01_591495 .       
Chr01_591514 .       
Chr01_591573 .       
Chr01_591609 .       
Chr01_591640 .       

 ..............................
 ........suppressing 216001 rows in show(); maybe adjust options(max.print=, width=)
 ..............................
                              s0
Chr19_15274658       .          
Chr19_15276166       .          
Chr19_15277295       .          
Chr19_15277351       .          
Chr19_15277398       .          
Chr19_15277503       .          
Chr19_15277526       .          
Chr19_15277548       .          
Chr19_15277652       .          
Chr19_15277677       .          
Chr19_15289229       .          
Chr19_15289241       .          
Chr19_15289370       .          
Chr19_15289394       .          
Chr19_15289784       .          
Chr19_15291310       .          
Chr19_15293126       .          
Chr19_15294107       .          
Chr19_15316651       .          
Chr19_15316714       .          
Chr19_15316792       .          
Chr19_15316870       .          
Chr19_15318948       .          
Chr19_15318997       .          
Chr19_15319097       .          
Chr19_15319250       .          
Chr19_15319253       .          
Chr19_15319328       .          
Chr19_15319495       .          
Chr19_15334247       .          
Chr19_15334261       .          
Chr19_15334575       .          
Chr19_15334608       .          
Chr19_15334765       .          
Chr19_15334817       .          
Chr19_15335034       .          
Chr19_15335055       .          
Chr19_15335894       .          
Chr19_15335986       .          
Chr19_15340197       .          
Chr19_15340381       .          
Chr19_15341421       .          
Chr19_15341474       .          
Chr19_15341661       .          
Chr19_15341678       .          
Chr19_15342451       .          
Chr19_15352241       .          
Chr19_15352335       .          
Chr19_15352484       .          
Chr19_15352522       .          
Chr19_15352573       .          
Chr19_15352595       .          
Chr19_15352735       .          
Chr19_15353439       .          
Chr19_15353476       .          
Chr19_15353566       .          
Chr19_15358298       .          
Chr19_15359270       .          
Chr19_15359502       .          
Chr19_15360407       .          
Chr19_15371485       .          
Chr19_15372372       .          
Chr19_15372396       .          
Chr19_15372659       .          
Chr19_15377848       .          
Chr19_15377885       .          
Chr19_15377905       .          
Chr19_15377922       .          
Chr19_15377959       .          
Chr19_15377962       .          
Chr19_15379530       .          
Chr19_15379600       .          
Chr19_15379829       .          
Chr19_15380189       .          
Chr19_15380807       .          
Chr19_15380830       .          
Chr19_15380872       .          
Chr19_15380915       .          
Chr19_15380923       .          
Chr19_15380960       .          
Chr19_15380997       .          
Chr19_15382854       .          
Chr19_15383050       .          
Chr19_15383311       .          
Chr19_15383473       .          
Chr19_15383596       .          
Chr19_15384099       .          
Chr19_15384265       .          
Chr19_15384433       .          
Chr19_15384447       .          
Chr19_15384486       .          
Chr19_15384538       .          
Chr19_15384621       .          
Chr19_15384672       .          
Chr19_15384675       .          
Chr19_15384779       .          
Chr19_15384826       .          
Chr19_15384867       .          
Chr19_15385000       .          
Chr19_15385014       .          
Chr19_15385381       .          
Chr19_15386763       .          
Chr19_15386818       .          
Chr19_15386854       .          
Chr19_15386969       .          
Chr19_15418689       .          
Chr19_15418730       .          
Chr19_15418838       .          
Chr19_15418958       .          
Chr19_15419078       .          
Chr19_15419690       .          
Chr19_15419691       .          
Chr19_15421100       .          
Chr19_15421355       .          
Chr19_15422439       .          
Chr19_15422450       .          
Chr19_15424787       .          
Chr19_15424830       .          
Chr19_15424897       .          
Chr19_15424913       .          
Chr19_15425552       .          
Chr19_15434789       .          
Chr19_15437784       .          
Chr19_15437821      -0.033932369
Chr19_15437831       .          
Chr19_15438414       .          
Chr19_15438511       .          
Chr19_15439971       .          
Chr19_15440585       .          
Chr19_15440619       .          
Chr19_15440801       .          
Chr19_15441107       .          
Chr19_15441113       .          
Chr19_15457182       .          
Chr19_15458856       .          
Chr19_15473567       .          
Chr19_15473575       .          
Chr19_15473715       .          
Chr19_15473717       .          
Chr19_15473794       .          
Chr19_15476673       .          
Chr19_15486585       .          
Chr19_15486854       .          
Chr19_15488374       .          
Chr19_15488411       .          
Chr19_15488990       .          
Chr19_15490089       .          
Chr19_15490794       .          
Chr19_15490946       .          
Chr19_15491401       .          
Chr19_15491454       .          
Chr19_15491703       .          
Chr19_15491745       .          
Chr19_15491772       .          
Chr19_15491835       .          
Chr19_15501531       .          
Chr19_15501584       .          
Chr19_15501764       .          
Chr19_15503678       .          
Chr19_15503679       .          
Chr19_15503800       .          
Chr19_15503823       .          
Chr19_15503855       .          
Chr19_15504437       .          
Chr19_15504489       .          
Chr19_15504754       .          
Chr19_15504959       .          
Chr19_15504976       .          
Chr19_15505148       .          
Chr19_15512415       .          
Chr19_15512455       .          
Chr19_15512728       .          
Chr19_15512751       .          
Chr19_15512804       .          
Chr19_15512923       .          
Chr19_15514260       .          
Chr19_15514280       .          
Chr19_15514315       .          
Chr19_15514437       .          
Chr19_15514445       .          
Chr19_15514495       .          
Chr19_15514675       .          
Chr19_15514735       .          
Chr19_15514739       .          
Chr19_15514801       .          
Chr19_15517327       .          
Chr19_15517864       .          
Chr19_15519861       .          
Chr19_15520446       .          
Chr19_15521481       .          
Chr19_15521657       .          
Chr19_15522415       .          
scaffold_104_5033    .          
scaffold_104_6585    .          
scaffold_104_6597    .          
scaffold_104_17593   .          
scaffold_104_17629   .          
scaffold_104_17813   .          
scaffold_104_18346   .          
scaffold_104_20116   .          
scaffold_104_21900   .          
scaffold_104_22333   .          
scaffold_104_22340   .          
scaffold_104_22697   .          
scaffold_104_24631   .          
scaffold_104_25465   .          
scaffold_104_25504   .          
scaffold_104_176220  .          
scaffold_104_176221  .          
scaffold_104_176303  .          
scaffold_104_176365  .          
scaffold_104_176406  .          
scaffold_104_176500  .          
scaffold_104_176531  .          
scaffold_104_176588  .          
scaffold_104_176762  .          
scaffold_104_176819  .          
scaffold_1865_26377  .          
scaffold_1865_26378  .          
scaffold_1865_27086  .          
scaffold_1865_28208  .          
scaffold_1865_28230  .          
scaffold_1865_28679  .          
scaffold_1865_28685  .          
scaffold_1865_28747  .          
scaffold_1865_40702  .          
scaffold_1865_40712  .          
scaffold_1865_40749  .          
scaffold_1865_41180  .          
scaffold_1865_41236  .          
scaffold_1865_41275  .          
scaffold_1865_41282  .          
scaffold_1865_43041  .          
scaffold_1865_43059  .          
scaffold_25_72794    .          
scaffold_25_72850    .          
scaffold_25_72914    .          
scaffold_25_73404    .          
scaffold_25_73405    .          
scaffold_25_73441    .          
scaffold_25_73952    .          
scaffold_25_74812    .          
scaffold_25_74917    .          
scaffold_25_74982    .          
scaffold_25_76337    .          
scaffold_25_76608    .          
scaffold_25_76646    .          
scaffold_25_76744    .          
scaffold_25_86551    .          
scaffold_25_92220    .          
scaffold_25_92768    .          
scaffold_25_110434   .          
scaffold_25_110440   .          
scaffold_25_110820   .          
scaffold_25_110827   .          
scaffold_25_120819   .          
scaffold_25_121057   .          
scaffold_25_121072   .          
scaffold_25_121305   .          
scaffold_25_184475   .          
scaffold_25_186525   .          
scaffold_25_186530   .          
scaffold_25_186615   .          
scaffold_25_187603   .          
scaffold_25_187820   .          
scaffold_25_187861   .          
scaffold_25_188640   .          
scaffold_25_188859   .          
scaffold_25_189004   .          
scaffold_25_189792   .          
scaffold_25_189823   .          
scaffold_25_192092   .          
scaffold_25_192107   .          
scaffold_25_192610   .          
scaffold_25_192625   .          
scaffold_25_192656   .          
scaffold_25_192676   .          
scaffold_25_193419   .          
scaffold_25_193683   .          
scaffold_25_194247   .          
scaffold_25_194700   .          
scaffold_25_195279   .          
scaffold_25_195553   .          
scaffold_25_195576   .          
scaffold_25_195727   .          
scaffold_25_195826   .          
scaffold_25_195935   .          
scaffold_25_195978   .          
scaffold_25_196017   .          
scaffold_25_196026   .          
scaffold_25_196065   .          
scaffold_25_196074   .          
scaffold_25_196122   .          
scaffold_25_196152   .          
scaffold_25_196738   .          
scaffold_25_200665   .          
scaffold_25_201203   .          
scaffold_25_201424   .          
scaffold_25_201439   .          
scaffold_25_201684   .          
scaffold_25_202021   .          
scaffold_25_202278   .          
scaffold_25_202553   .          
scaffold_25_202556   .          
scaffold_25_202829   .          
scaffold_25_202944   .          
scaffold_25_203105   .          
scaffold_25_213867   .          
scaffold_25_214314   .          
scaffold_25_214384   .          
scaffold_25_214386   .          
scaffold_25_214664   .          
scaffold_25_214710   .          
scaffold_25_214711   .          
scaffold_25_214750   .          
scaffold_25_214802   .          
scaffold_25_214835   .          
scaffold_25_214879   .          
scaffold_25_214891   .          
scaffold_25_238236   .          
scaffold_25_238265   .          
scaffold_25_238492   .          
scaffold_25_238666   .          
scaffold_25_238715   .          
scaffold_25_238720   .          
scaffold_25_238751   .          
scaffold_25_238772   .          
scaffold_25_238807   .          
scaffold_25_238849   .          
scaffold_25_238883   .          
scaffold_25_238885   .          
scaffold_25_238940   .          
scaffold_25_239099   .          
scaffold_25_244645   .          
scaffold_25_244699   .          
scaffold_25_244941   .          
scaffold_25_244954   .          
scaffold_25_246819   .          
scaffold_25_246861   .          
scaffold_25_246877   .          
scaffold_25_246903   .          
scaffold_25_247101   .          
scaffold_25_247812   .          
scaffold_25_247815   .          
scaffold_25_247920   .          
scaffold_25_248231   .          
scaffold_25_248285   .          
scaffold_25_248515   .          
scaffold_25_248624   .          
scaffold_25_248643   .          
scaffold_25_248666   .          
scaffold_25_258188   .          
scaffold_25_258251   .          
scaffold_25_258300   .          
scaffold_25_258407   .          
scaffold_25_258560   .          
scaffold_25_258577   .          
scaffold_25_258959   .          
scaffold_25_259026   .          
scaffold_25_259277   .          
scaffold_25_259346   .          
scaffold_25_260131   .          
scaffold_25_272159   .          
scaffold_25_406100   .          
scaffold_25_411423   .          
scaffold_25_411483   .          
scaffold_25_411508   .          
scaffold_25_531736   .          
scaffold_25_556134   .          
scaffold_25_556197   .          
scaffold_25_556208   .          
scaffold_25_556263   .          
scaffold_25_556391   .          
scaffold_25_557158   .          
scaffold_25_557452   .          
scaffold_25_558898   .          
scaffold_25_558908   .          
scaffold_25_559074   .          
scaffold_25_559096   .          
scaffold_25_559174   .          
scaffold_25_559444   .          
scaffold_25_567330   .          
scaffold_25_606334   .          
scaffold_25_606358   .          
scaffold_25_607239   .          
scaffold_25_607263   .          
scaffold_25_636758   .          
scaffold_25_636802   .          
scaffold_25_637384   .          
scaffold_25_637857   .          
scaffold_25_638111   .          
scaffold_25_638153   .          
scaffold_25_638309   .          
scaffold_25_638360   .          
scaffold_25_638455   .          
scaffold_25_638579   .          
scaffold_25_638599   .          
scaffold_25_638831   .          
scaffold_25_638834   .          
scaffold_25_638879   .          
scaffold_25_639332   .          
scaffold_25_639378   .          
scaffold_25_640014   .          
scaffold_25_640025   .          
scaffold_25_640277   .          
scaffold_502_195     .          
scaffold_502_17929   .          
scaffold_502_59179   .          
scaffold_502_59192   .          
scaffold_502_59486   .          
scaffold_502_59521   .          
scaffold_502_67629   .          
scaffold_502_68173   .          
scaffold_502_68182   .          
scaffold_502_96963   .          
scaffold_502_106022  .          
scaffold_502_106041  .          
scaffold_502_106416  .          
scaffold_502_106535  .          
scaffold_502_120281  .          
scaffold_509_28598   .          
scaffold_509_29484   .          
scaffold_509_30239   .          
scaffold_509_30252   .          
scaffold_509_31139   .          
scaffold_509_59662   0.008606299
scaffold_509_61199   .          
scaffold_509_64533   .          
scaffold_509_64602   .          
scaffold_509_64644   .          
scaffold_509_65419   .          
scaffold_509_92801   .          
scaffold_509_92886   .          
scaffold_509_92919   .          
scaffold_509_92941   .          
scaffold_509_93157   .          
scaffold_67_63206    .          
scaffold_67_77350    .          
scaffold_67_77432    .          
scaffold_67_77466    .          
scaffold_67_77714    .          
scaffold_67_78709    .          
scaffold_67_87043    .          
scaffold_67_87169    .          
scaffold_67_90371    .          
scaffold_67_98980    .          
scaffold_67_99999    .          
scaffold_67_100018   .          
scaffold_67_100036   .          
scaffold_67_103973   .          
scaffold_67_103993   .          
scaffold_67_104506   .          
scaffold_67_104535   .          
scaffold_67_104578   .          
scaffold_67_131620   .          
scaffold_67_131649   .          
scaffold_67_131655   .          
scaffold_67_132628   .          
scaffold_67_132652   .          
scaffold_67_132677   .          
scaffold_67_145113   .          
scaffold_67_145184   .          
scaffold_67_146362   .          
scaffold_67_146854   .          
scaffold_67_146877   .          
scaffold_67_169880   .          
scaffold_780_11876   .          
scaffold_780_11892   .          
scaffold_780_50549   .          
scaffold_780_50589   .          
scaffold_780_75954   .          
scaffold_780_75965   .          
scaffold_780_83278   .          
scaffold_780_85135   .          
scaffold_780_87469   .          
scaffold_780_87488   .          
scaffold_780_90170   .          
scaffold_820_18504   .          
scaffold_820_65666   .          
scaffold_820_66347   .          
scaffold_820_66388   .          
scaffold_820_66432   .          
scaffold_820_68792   .          
scaffold_820_68844   .          
scaffold_820_68938   .          
scaffold_820_68951   .          
scaffold_820_68982   .          
scaffold_820_69022   .          
scaffold_820_69553   .          
scaffold_820_69665   .          
scaffold_820_69977   .          
scaffold_820_76153   .          
scaffold_820_76500   .          
scaffold_820_78478   .          
scaffold_820_78596   .          
scaffold_820_79852   .          
scaffold_820_80063   .          
scaffold_820_80326   .          
scaffold_820_80332   .          
scaffold_820_80368   .          
print(best_lambda)
[1] 0.01043408
y_pred <- predict(lasso_model, newx = X_test)

# Calcul du R²
SSE <- sum((y_test - y_pred)^2)           # somme des carrés des erreurs
SST <- sum((y_test - mean(y))^2)         # somme totale des carrés
R2 <- 1 - SSE/SST
print(sqrt(SSE))
[1] 4.11137
R2
[1] 0.3508067
coef_df <- data.frame(
  SNP = rownames(coef(lasso_model)),
  Coefficient = as.numeric(coef(lasso_model))
)
coef_nonzero <- coef_df[coef_df$Coefficient != 0, ]
coef_nonzero_dt <- as.data.table(coef_nonzero)

# Top N SNP par valeur absolue des coefficients
N <- 50
coef_nonzero_dt[, abs_coef := abs(Coefficient)]
topN_Lasso <- coef_nonzero_dt[order(-abs_coef)][1:N, .(SNP, Coefficient)]

topN_Lasso

Comparaison selection Lasso et PLS


common_snps <- intersect(df_topNPLS$SNP, coef_nonzero$SNP)

# SNP communs
common_snps
 [1] "Chr01_1834461"  "Chr01_4153800"  "Chr01_42899837" "Chr02_10114463" "Chr02_19743607" "Chr02_22637968"
 [7] "Chr02_2278350"  "Chr02_2872467"  "Chr02_4257956"  "Chr03_10509513" "Chr03_16762734" "Chr03_19851644"
[13] "Chr04_20270150" "Chr04_23177824" "Chr04_5474624"  "Chr04_8190739"  "Chr04_8724191"  "Chr05_20338179"
[19] "Chr05_6142501"  "Chr06_20435964" "Chr07_12245457" "Chr07_12245863" "Chr07_2271365"  "Chr08_10723562"
[25] "Chr08_2817232"  "Chr08_5239720"  "Chr08_7499581"  "Chr08_8269878"  "Chr09_12546352" "Chr09_12827737"
[31] "Chr09_2269452"  "Chr09_9389914"  "Chr09_9389916"  "Chr10_11603279" "Chr10_12391252" "Chr10_13951310"
[37] "Chr10_16864286" "Chr10_20023115" "Chr10_8950463"  "Chr10_8954059"  "Chr10_9695555"  "Chr11_17972339"
[43] "Chr11_18969957" "Chr12_4109852"  "Chr13_11901187" "Chr13_1367659"  "Chr13_1486685"  "Chr13_4758531" 
[49] "Chr14_2168682"  "Chr14_4891599"  "Chr14_5691154"  "Chr14_5767204"  "Chr14_8097754"  "Chr15_9360139" 
[55] "Chr17_10721759" "Chr18_3096178"  "Chr18_8374212"  "Chr18_8375522" 
length(common_snps)
[1] 58

réegression ridge adaptative

y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)

# Split externe train/test
set.seed(123)
train_index <- createDataPartition(y, p = 0.9, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]

# Split interne du train en D1 et D2 : 50/50
set.seed(123)
idx_D1 <- createDataPartition(y_train, p = 0.5, list = FALSE)

X_D1 <- X_train[idx_D1, ]
X_D2 <- X_train[-idx_D1, ]
y_D1 <- y_train[idx_D1]
y_D2 <- y_train[-idx_D1]

STEP I : SCREENING (Elastic Net sur D1)

alpha <- 1 # Elastic Net si alpha = 0.5, Lasso si = 1, ridge si =0 
set.seed(123)
cv_enet <- cv.glmnet(
  X_D1, y_D1,
  alpha = alpha,               
  nfolds = 10,
  standardize = TRUE
)

lambda_hat <- cv_enet$lambda.min
lambda_hat
[1] 0.2302412
# Ajustement final 
enet_model <- glmnet(X_D1, y_D1, alpha = alpha, lambda = 0.002)


coefs <- coef(enet_model)
selected <- which(coefs[-1] != 0)          # indices des SNP sélectionnés
S_hat <- selected

cat("Nombre de variables sélectionnées (screening):", length(S_hat), "\n")
Nombre de variables sélectionnées (screening): 170 
# Poids dérivés des coefficients Elastic Net
beta_hat <- as.numeric(coefs[-1])
weights <- abs(beta_hat)
weights <- weights[S_hat]
weights <- weights / max(weights)          # normalisation
lambda_hat <- 0.002

STEP II : CLEANING (Ridge sur D2)

X_D2_sel <- X_D2[, S_hat, drop = FALSE]

# Pondération Ridge : mettre plus de pénalité sur les petites β_EN
ridge_penalty <- 1 / (weights + 1e-6)

set.seed(123)
cv_ridge <- cv.glmnet(
  X_D2_sel, y_D2,
  alpha = 0,                     # alpha = 0 → Ridge
  nfolds = 10,
  penalty.factor = ridge_penalty
)

lambda_ridge <- cv_ridge$lambda.min

# Ajustement final sur D2
ridge_model <- glmnet(
  X_D2_sel, y_D2,
  alpha = 0,
  lambda = lambda_ridge,
  penalty.factor = ridge_penalty
)

# Coefficients ridge finaux (sur D2)
beta_clean <- coef(ridge_model)

STEP III : REFIT FINAL SUR D = D1 ∪ D2 (train complet)

X_train_sel <- X_train[, S_hat, drop = FALSE]

ridge_final <- glmnet(
  X_train_sel, y_train,
  alpha = 0,
  lambda = lambda_ridge,
  penalty.factor = ridge_penalty
)

# Prédictions sur le test externe
X_test_sel <- X_test[, S_hat, drop = FALSE]
y_pred <- predict(ridge_final, newx = X_test_sel)

# R² externe
R2_test <- 1 - sum((y_test - y_pred)^2) / sum((y_test - mean(y_test))^2)
cat("R2 externe :", R2_test, "\n")
R2 externe : 0.2161655 
---
title: "Clustering LD phenotypique based"
format: html
output: html_notebook
editor: visual
---
```{r}
library(data.table)
library(ggplot2)
library(caret)
library(nnet)
library(pls)
library(FactoMineR)
library(qqman)
library(mixOmics)

```


# import des datas
```{r}
data(Phenotype)
data(Genomic)
data("localisation")
```


```{r}
pheno <- as.data.table(Phenotype)
loc_df<- as.data.table(localisation)
geno<- as.data.table(Genomic)
geno[, ID := rownames(Genomic)]
pheno[, ID := rownames(Phenotype)]

merged<- merge(pheno, loc_df[,list(Population,ID)], by = "ID")
merged<- merge(merged, geno, by = "ID")
```



# LM (Pheno ~ population )

```{r}

df <- as.data.table(merged)

phenos <- names(df)[sapply(df, is.numeric)]
phenos <- setdiff(phenos, c("class_index","Lat","Lgn","Elev","ID"))  

get_r2 <- function(var) {
  form<- as.formula(paste(var, "~ Population + Elev + Lat + Lgn "))
  mod<- lm(form, data = df)
  summary(mod)$r.squared
}

r2_values<- data.table(
  phenotype = phenos,
  r2= sapply(phenos, get_r2)

)
```



```{r}

ggplot(r2_values, aes(x = reorder(phenotype, r2), y = r2)) +
  geom_point(size = 3) +
  geom_segment(aes(x = phenotype, xend = phenotype, y = 0, yend = r2)) +
  coord_flip() +
  ylab("R² lm(Phénotype ~ Population)") +
  xlab("Caractère phénotypique") +
  theme_minimal()
```

# Data visualisation 

Groupes de variables : chromosomes, phénotypes, populations 

constructions des axes : chromosomes => on sépare les individus selon leur distances génomiques et on projette en supplémentaires les phénotypes / population 

on pourra voir quels chromosomes ne sont pas dutout corrélés avec des traits phénotypiques 

# Réccupération des positions : 

```{r}
geno_mat <- as.matrix(geno)
coords <- do.call(rbind, strsplit(colnames(geno), "_"))
chrom <- as.factor(coords[,1])
pos <- as.numeric(coords[,2])
```




## PCA


```{r}
pca <- PCA(merged[,2:217022], quali.sup=21 ,quanti.sup=(2:20) )
```
```{r}
plot(pca,choix="ind",habillage=21)
```
```{r}
plot(pca,choix="var", #invisible = c("quali","var"),
     label = c("quanti.sup"),
     select = "coord 10" )
```

## MixOmic 

### sPCA

```{r}
X<- as.matrix(merged[,23:217022])
X2 <- X[, apply(X, 2, sd) != 0]

Y <- as.matrix(merged$CIRC2011)

result.spca.multi <- spca(X2, keepX = c(50, 30))

plotIndiv(result.spca.multi)  
plotVar(result.spca.multi)   

# extract the variables used to construct the first PC
selectVar(result.spca.multi, comp = 1)$name 
# depict weight assigned to each of these variables
plotLoadings(result.spca.multi, method = 'mean', contrib = 'max')
```

### block PLS 

```{r}
pls.result <- mixOmics::pls(X, Y) 
plotIndiv(pls.result) 
```

```{r}
plotIndiv(pls.result, group = merged$Population, 
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)
```

### block sPLS
```{r}
head(merged[,2:21])
```

```{r}
Y <- as.matrix(merged[,2:21])

spls.result <- mixOmics::spls(X2, Y, ncomp=5, keepX = c(rep(1000,5))) 

plotIndiv(spls.result, group = merged$Population, 
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)
```


```{r}
# Identifier les coefficients non nuls
nonzero_idx <- loadX != 0
df_nonzero_X <- data.frame(
  SNP = names(loadX)[nonzero_idx],
  loading  = loadX[nonzero_idx]
)
coords <- do.call(rbind, strsplit(rownames(df_nonzero_X), "_"))
coords[,1] <- as.numeric(sub("Chr", "", coords[,1]))
pos <- as.numeric(coords[,2])
merged_nonzero <- cbind(coords,df_nonzero_X)
merged_nonzero$"2" <- as.numeric(merged_nonzero$"2")
merged_nonzero$"1" <- as.numeric(merged_nonzero$"1")
merged_nonzero$loading <- abs(merged_nonzero$loading)
```


```{r}
manhattan(merged_nonzero, chr = "1", bp = "2", p = "loading",snp = "SNP",
          col = c("blue4", "orange3"),  # couleurs alternées par chromosome
          cex = 0.6)  # taille des points
```
### block PLS_DA

```{r}
# use the mirna, mrna and protein expression levels as predictive datasets
# note that each dataset is measured across the same individuals (samples)

X1<- as.matrix(merged[,23:217022])
X1 <- X1[, apply(X1, 2, sd) != 0]
X2 <- as.matrix(merged[,2:21])

Y <- as.factor(merged$Population)

X <- list(geno = X1, pheno = X2)

```



```{r}
result.diablo.tcga <- block.plsda(X, Y) # run the method

```
```{r}
plotIndiv(result.diablo.tcga,
          rep.space = 'XY-variate', 
          ellipse = TRUE,  # plot using the ellipses
          legend = TRUE)
```

### Block sPLS DA

```{r}
# set the number of features to use for the X datasets
list.keepX = list(geno = c(rep(10000,5)), pheno = c(rep(20,5)))
list.keepX
```


```{r}
# run the method
result.sparse.diablo.tcga <-  block.splsda(X, Y, keepX = list.keepX, ncomp = 5)
```


```{r}
plotLoadings(result.sparse.diablo.tcga, ncomp = 5)
```


```{r}
plotIndiv(result.sparse.diablo.tcga,ellipse = TRUE) 
```


```{r}
plotVar(result.sparse.diablo.tcga) # plot the variables
```

### Boostrapped sPLS 

```{r}
library(spls)
y <- merged$CIRC2009  
y <- as.numeric(y)
X <- as.matrix(merged[,23:217022])
```


```{r}
# Split stratifié 
# Ici, on fait un split simple car phénotype continu
train_index <- createDataPartition(y, p = 0.9, list = FALSE)

X_train <- X[train_index, ]
var_threshold <- 1e-8

nonzero_cols <- apply(X_train, 2, var) > var_threshold

length(nonzero_cols)
X_train <- X_train[, nonzero_cols]

X_test  <- X[-train_index, ]
X_test  <- X_test[, nonzero_cols]


y_train <- y[train_index]

y_test  <- y[-train_index]
```



```{r}
eta = seq(0.1,0.9,0.1)
K = c(1:5)
cv.spls( X_train, y_train, fold=5, K, eta, kappa=0.5, select="pls2", fit="simpls",scale.x=FALSE, scale.y=FALSE, plot.it=TRUE )
```
```{r}
model <- spls(X_train, y_train, K = 4, eta =0.2 , kappa=0.5, select="pls2", fit="simpls",
scale.x=FALSE, scale.y=FALSE, eps=1e-4, maxstep=100, trace=FALSE)
```

```{r}
ped <- predict.spls( model, X_test, type = c("fit","coefficient"))
```

```{r}
ped
```

```{r}
f <- spls::ci.spls( model, coverage=0.95, B=1000,
                    plot.it=TRUE, plot.fix="y",
                    plot.var=NA, K=model$K, fit=model$fit )


```
```{r}
names(f)
```

```{r}
nonzero_idx <- which(f$betahat != 0)
betahat_nonzero <- f$betahat[nonzero_idx]
```

```{r}
length(nonzero_idx)
```


## MFA 
```{r}
res = MFA(geno_mat, group=c(25974,16646,12284,11790,14769,17004,8327,14040,9840,15964,7483,7403,8040,11123,8486,7629,6747,8267,4876,308), type=c(rep("s",20)), ncp=5, name.group=c("Chr01","Chr02","Chr03","Chr04", "Chr05","Chr06","Chr07","Chr08","Chr09","Chr10", "Chr11","Chr12","Chr13", "Chr14","Chr15","Chr16","Chr17","Chr18","Chr19", "scaffold"))
```


```{r}
dim(geno_mat)
```


# PLS : Phenotype ~ all SNP 

```{r}

# Préparation des donnée

y <- pheno$CIRC2009  
y <- as.numeric(y)
X <- as.matrix(geno)  

# Split stratifié 
# Ici, on fait un split simple car phénotype continu
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]
```


```{r}
#  PLS 

pls_model <- plsr(y_train ~ X_train, ncomp = 20, validation = "CV")
```

```{r}
y_pred <- predict(pls_model, newdata = X_test, ncomp = 2)

# Evaluation 
R2_test <- cor(y_test, y_pred)^2
cat("R² sur l'ensemble de test :", round(R2_test, 3), "\n")

RMSE_test <- sqrt(mean((y_test - y_pred)^2))
cat("RMSEP sur l'ensemble de test :", round(RMSE_test, 3), "\n")
```


```{r}

y_pred_vec <- as.vector(y_pred)

plot_df <- data.frame(
  Observed = y_test,
  Predicted = y_pred_vec,
  Population = merged$Population[-train_index]
)

ggplot(plot_df, aes(x = Observed, y = Predicted, color = Population)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_abline(slope = 1, intercept = 0, color = "black", linetype = "dashed") +
  theme_minimal() +
  labs(title = "Performance PLS : Observé vs Prédit",
       subtitle = paste("R² =", round(cor(plot_df$Observed, plot_df$Predicted)^2, 3)),
       x = "Phénotype Observé",
       y = "Phénotype Prédit",
       color = "Population") +
  theme(plot.title = element_text(face = "bold", size = 14),
        plot.subtitle = element_text(size = 12),
        legend.position = "right")
```


```{r}
weights_comp1 <- pls_model$loading.weights[, 1]

weights_abs <- abs(weights_comp1)

top20_idx <- order(weights_abs, decreasing = TRUE)[1:20]
top20_snps <- rownames(pls_model$loading.weights)[top20_idx]
top20_values <- weights_comp1[top20_idx]

data.frame(SNP = top20_snps, Weight = top20_values)

```

```{r}
hist(weights_comp1, breaks = seq(min(weights_comp1), max(weights_comp1), length.out = 50),
     main = "Distribution des poids de la 1ère composante",
     xlab = "Poids de chargement", col = "lightblue", border = "white")
```


# PLS Subset SNP 




```{r}
set.seed(123)

y <- pheno$CIRC2009  
y <- as.numeric(y)
X <- as.matrix(geno) 


# Paramètres
n_iter <- 10
subset_sizes <- c(1000, 5000)
ncomp_candidates <- 1:5 

results <- data.frame(
  subset_size = integer(),
  iter = integer(),
  outer_fold = integer(),
  ncomp_opt = integer(),
  R2 = numeric(),
  RMSE = numeric()
)

# Folds externes
outer_folds <- createFolds(y, k = 5, returnTrain = TRUE)

for(n_snps in subset_sizes){
  cat("Taille du subset :", n_snps, "\n")
  
  for(i in 1:n_iter){
    
    # Tirage aléatoire de SNP
    snp_cols <- sample(colnames(X), n_snps)
    X_sub <- X[, snp_cols]
    
    for(f in 1:5){
      
      # Split externe
      train_index <- outer_folds[[f]]
      test_index  <- setdiff(seq_len(nrow(X_sub)), train_index)
      
      X_train <- X_sub[train_index, ]
      X_test  <- X_sub[test_index, ]
      y_train <- y[train_index]
      y_test  <- y[test_index]
      
      # -----------------------------
      # Inner 5-fold CV pour choisir ncomp
      # -----------------------------
      pls_model <- plsr(y_train ~ X_train, ncomp = max(ncomp_candidates), validation = "CV", segments = 5)
      bestncomp = selectNcomp(pls_model,method="randomization") 
      y_pred <- predict(pls_model,  newdata = X_test, ncomp = bestncomp)
      
      # -----------------------------
      # Modèle final sur l'entraînement externe
      # -----------------------------
      final_model <- plsr(y_train ~ X_train, ncomp = bestncomp, validation = "none")
      y_pred <- predict(final_model, newdata = X_test, ncomp = bestncomp)
      
      # Performance externe
      R2_test <- cor(y_test, y_pred)^2
      RMSE_test <- sqrt(mean((y_test - y_pred)^2))
      
      results <- rbind(results, data.frame(
        subset_size = n_snps,
        iter = i,
        outer_fold = f,
        ncomp_opt = bestncomp,
        R2 = R2_test,
        RMSE = RMSE_test
      ))
    }
  }
}

```




```{r}
# Boxplots
ggplot(results, aes(x = factor(subset_size), y = R2, fill = factor(subset_size))) +
  geom_boxplot() + theme_minimal() +
  labs(title = "R² selon la taille du sous-ensemble de SNP", x = "Nombre de SNP", y = "R² sur l'échantillon test") +
  scale_fill_brewer(palette = "Set2") + theme(legend.position = "none")

ggplot(results, aes(x = factor(subset_size), y = RMSE, fill = factor(subset_size))) +
  geom_boxplot() + theme_minimal() +
  labs(title = "RMSEP selon la taille du sous-ensemble de SNP", x = "Nombre de SNP", y = "RMSEP") +
  scale_fill_brewer(palette = "Set2") + theme(legend.position = "none")
```
```{r}
results$subset_size <- as.factor(results$subset_size)
summary(results)
```
```{r}
setDT(results)
compact_summary <- results[
  , {
      stats <- function(x) {
        c(
          Min      = min(x, na.rm = TRUE),
          Q1       = quantile(x, 0.25, na.rm = TRUE),
          Median   = median(x, na.rm = TRUE),
          Mean     = mean(x, na.rm = TRUE),
          SD       = sd(x, na.rm = TRUE),
          Q3       = quantile(x, 0.75, na.rm = TRUE),
          Max      = max(x, na.rm = TRUE)
        )
      }
      
      data.table(
        Statistic = names(stats(iter)),
        iter      = stats(iter),
        ncomp_opt = stats(ncomp_opt),
        R2        = stats(R2),
        RMSE      = stats(RMSE)
      )
    },
  by = subset_size
]

compact_summary
```
# PLS Subset SNP | calcul de VIP | nested CV 

$$VIP_{j} = \sqrt{ p \frac{\displaystyle \sum_{h=1}^{A} SS_{Y,h}\frac{w_{jh}^{2}}{\lVert w_{h} \rVert^{2}}}{\displaystyle \sum_{h=1}^{A} SS_{Y,h}} }$$

```{r}
library(pls)
library(data.table)
library(caret)
library(foreach)
library(doParallel)
library(progressr)
```

```{r}
set.seed(123)
# Paramètres
n_iter <- 400
subset_size <- 10000     # nombre de SNP aléatoires par itération
ncomp_candidates <- 1:5  # candidats pour ncomp
nfold_outer <- 5
nfold_inner <- 10
```


```{r}

y <- pheno$CIRC2009
y <- as.numeric(y)
X <- as.matrix(geno)

# Fonction pour calculer les VIP 
calc_vip <- function(pls_model) {
  W <- pls_model$loading.weights
  SSY <- colSums(pls_model$Yscores^2)
  p <- nrow(W)
  A <- ncol(W)
  vip <- numeric(p)
  for (j in 1:p) {
    vip[j] <- sqrt(p * sum(SSY * (W[j, ]^2)) / sum(SSY))
  }
  names(vip) <- rownames(W)
  return(vip)
}

# Préparer cluster parallèle
n_cores <- parallel::detectCores() - 2
cl <- makeCluster(n_cores)
registerDoParallel(cl)

# --- Pipeline principal ---

out <- foreach(i = 1:n_iter, .packages = c("pls","caret","data.table")) %dopar% {

  # 1) Tirage aléatoire SNP
  snp_cols <- sample(colnames(X), subset_size)
  X_sub <- X[, snp_cols]

  # 2) Split en folds externes
  folds <- createFolds(y, k = nfold_outer, list = TRUE)

  # Tables locales
  perf_list <- list()
  vip_list  <- list()

  for(f in seq_len(nfold_outer)) {

    test_idx  <- folds[[f]]
    train_idx <- setdiff(seq_len(nrow(X_sub)), test_idx)

    X_train <- X_sub[train_idx, ]
    y_train <- y[train_idx]
    X_test  <- X_sub[test_idx, ]
    y_test  <- y[test_idx]

    # 3) Sélection ncomp via CV interne
    pls_cv <- plsr(y_train ~ X_train,
                   ncomp = max(ncomp_candidates),
                   validation = "CV",
                   segments = nfold_inner)

    best_ncomp <- selectNcomp(pls_cv, method = "onesigma") + 1

    # Ajustement final
    pls_model <- plsr(y_train ~ X_train, ncomp = best_ncomp)

    # 4) Performance test
    y_pred <- predict(pls_model, newdata = X_test, ncomp = best_ncomp)
    R2_test  <- cor(y_test, y_pred)^2
    RMSE_test <- sqrt(mean((y_test - y_pred)^2))

    # 5) VIP
    vip <- calc_vip(pls_model)

    # 6) Stockage séparé

    # performances fold-level
    perf_list[[f]] <- data.table(
      iter = i,
      fold = f,
      ncomp = best_ncomp,
      R2_test = R2_test,
      RMSE_test = RMSE_test
    )

    # VIP SNP-level
    vip_list[[f]] <- data.table(
      SNP = names(vip),
      VIP = as.numeric(vip),
      iter = i,
      fold = f
    )
  }

  list(
    perf = rbindlist(perf_list),
    vip  = rbindlist(vip_list)
  )
}

stopCluster(cl)

# Reconstruction finale dans l'ordre
results_perf <- rbindlist(lapply(out, `[[`, "perf"))
results_vip  <- rbindlist(lapply(out, `[[`, "vip"))

```

```{r}
results_perf
vip_perf<- results_perf[, .(R2_test_mean = mean(R2_test)), by = iter]
vip_perf_fold<- results_perf[, .(R2_test_mean = mean(R2_test)), by = fold]
```
```{r}
ggplot(results_perf[1:1000,], aes(x = factor(iter), y = R2_test)) +
  geom_violin(fill = "skyblue", color = "black") +
  labs(x = "Itération", y = expression(R^2), title = "R² 10-outer-fold ") +
  theme_minimal()
```
```{r}
mean(vip_perf_fold$R2_test_mean)
```

```{r}
vip_perf_fold
```

```{r}
library(data.table)
library(ggplot2)


# Calcul du VIP moyen par SNP
vip_mean<- results_vip[, .(VIP_mean = mean(VIP),SNP_count = .N), by = SNP]
```


```{r}
# Sélection des 30 SNP ayant le VIP moyen le plus élevé
topN<- vip_mean[order(-VIP_mean)][0:5000, SNP]

df_topNPLS<- results_vip[SNP %in% topN]
df_topNPLS <- merge(df_topNPLS, vip_mean[, .(SNP, SNP_count)], by = "SNP")
df_topNPLS$SNP_count <- df_topNPLS$SNP_count/10

```


```{r}
ggplot(df_topNPLS, aes(x = reorder(SNP,VIP), y = VIP, fill = SNP_count)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.1, outlier.size = 0.5) +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
  ) +
  labs(
    x = "SNP",
    y = "VIP",
    title = "Distribution du VIP pour les n = 50 SNP les plus importants",
    fill = "Nbr de selection du SNP"
  )
```


```{r}
hist(vip_mean$VIP_mean, breaks = seq(min(vip_mean$VIP_mean), max(vip_mean$VIP_mean), length.out = 50),
     main = "Distribution des scores VIP moyens",
     xlab = "scores VIP moyens", col = "lightblue", border = "white")
```
# Lasso 

```{r}
library(glmnet)

# Assurer que y est numérique et X est une matrice
y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)

train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]

# Ajustement du Lasso avec validation croisée pour choisir lambda
set.seed(123)

cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1, nfolds = 5)

# Afficher la meilleure valeur de lambda
best_lambda <- cv_lasso$lambda.min
print(best_lambda)

# Tracer l'erreur de validation croisée
plot(cv_lasso)

# Ajuster le modèle final avec lambda optimal
lasso_model <- glmnet(X_train, y_train, alpha = 1, lambda = best_lambda)

# Extraire les coefficients
coef(lasso_model)
```
```{r}
print(best_lambda)
```


```{r}
y_pred <- predict(lasso_model, newx = X_test)

# Calcul du R²
SSE <- sum((y_test - y_pred)^2)           # somme des carrés des erreurs
SST <- sum((y_test - mean(y))^2)         # somme totale des carrés
R2 <- 1 - SSE/SST
print(sqrt(SSE))
R2
```


```{r}
coef_df <- data.frame(
  SNP = rownames(coef(lasso_model)),
  Coefficient = as.numeric(coef(lasso_model))
)
coef_nonzero <- coef_df[coef_df$Coefficient != 0, ]
coef_nonzero_dt <- as.data.table(coef_nonzero)

# Top N SNP par valeur absolue des coefficients
N <- 50
coef_nonzero_dt[, abs_coef := abs(Coefficient)]
topN_Lasso <- coef_nonzero_dt[order(-abs_coef)][1:N, .(SNP, Coefficient)]

topN_Lasso
```

# Comparaison selection Lasso et PLS 

```{r}

common_snps <- intersect(df_topNPLS$SNP, coef_nonzero$SNP)

# SNP communs
common_snps
length(common_snps)

```
# réegression ridge adaptative

```{r}
y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)

# Split externe train/test
set.seed(123)
train_index <- createDataPartition(y, p = 0.9, list = FALSE)
X_train <- X[train_index, ]
X_test  <- X[-train_index, ]
y_train <- y[train_index]
y_test  <- y[-train_index]

# Split interne du train en D1 et D2 : 50/50
set.seed(123)
idx_D1 <- createDataPartition(y_train, p = 0.5, list = FALSE)

X_D1 <- X_train[idx_D1, ]
X_D2 <- X_train[-idx_D1, ]
y_D1 <- y_train[idx_D1]
y_D2 <- y_train[-idx_D1]
```




### STEP I : SCREENING (Elastic Net sur D1)



```{r}
alpha <- 1 # Elastic Net si alpha = 0.5, Lasso si = 1, ridge si =0 
set.seed(123)
cv_enet <- cv.glmnet(
  X_D1, y_D1,
  alpha = alpha,               
  nfolds = 10,
  standardize = TRUE
)

lambda_hat <- cv_enet$lambda.min
```

```{r}
lambda_hat
```


```{r}
# Ajustement final 
enet_model <- glmnet(X_D1, y_D1, alpha = alpha, lambda = 0.002)


coefs <- coef(enet_model)
selected <- which(coefs[-1] != 0)          # indices des SNP sélectionnés
S_hat <- selected

cat("Nombre de variables sélectionnées (screening):", length(S_hat), "\n")

# Poids dérivés des coefficients Elastic Net
beta_hat <- as.numeric(coefs[-1])
weights <- abs(beta_hat)
weights <- weights[S_hat]
weights <- weights / max(weights)          # normalisation
```

```{r}
lambda_hat <- 0.002
```


## STEP II : CLEANING (Ridge sur D2)



```{r}
X_D2_sel <- X_D2[, S_hat, drop = FALSE]

# Pondération Ridge : mettre plus de pénalité sur les petites β_EN
ridge_penalty <- 1 / (weights + 1e-6)

set.seed(123)
cv_ridge <- cv.glmnet(
  X_D2_sel, y_D2,
  alpha = 0,                     # alpha = 0 → Ridge
  nfolds = 10,
  penalty.factor = ridge_penalty
)

lambda_ridge <- cv_ridge$lambda.min

# Ajustement final sur D2
ridge_model <- glmnet(
  X_D2_sel, y_D2,
  alpha = 0,
  lambda = lambda_ridge,
  penalty.factor = ridge_penalty
)

# Coefficients ridge finaux (sur D2)
beta_clean <- coef(ridge_model)
```



# STEP III : REFIT FINAL SUR D = D1 ∪ D2 (train complet)



```{r}
X_train_sel <- X_train[, S_hat, drop = FALSE]

ridge_final <- glmnet(
  X_train_sel, y_train,
  alpha = 0,
  lambda = lambda_ridge,
  penalty.factor = ridge_penalty
)

# Prédictions sur le test externe
X_test_sel <- X_test[, S_hat, drop = FALSE]
y_pred <- predict(ridge_final, newx = X_test_sel)

# R² externe
R2_test <- 1 - sum((y_test - y_pred)^2) / sum((y_test - mean(y_test))^2)
cat("R2 externe :", R2_test, "\n")
```




